# Write script to get info on amendments vs readings
# Info taken from headers of divisions on PublicWhip.org
# This script was run in May 2017. 
# The output files "divisions_wFreeVote_reading_1015" and "divisions_wFreeVote_reading_0510" are in the calcdata folder
# This script produces columns 2-5 in table A5 (appendix). Column 1 of A5 is from script?? -- Braeuninger et al replication


##DO NOT RUN THIS SECTION!!!!
##This section is really for info only. Gets data from web. To use the data we used, begin running code at line 66
####################
rm(list=ls())

library(rvest)

## Path 
path <- "~/Dropbox (Personal)/Rebel Summaries/APSR_SKLLO_Repfiles/"

div.info <- read.csv(paste(path,"RawData/2010-2015/divisions.csv", sep=""))

div.info$secondread <- rep(NA, nrow(div.info))
div.info$thirdread <- rep(NA, nrow(div.info))
div.info$oppday <- rep(NA, nrow(div.info))
div.info$clause <- rep(NA, nrow(div.info))

for (i in 1:nrow(div.info)){
	url <- as.character(div.info$PubWhipUrl[i])
	pubwhip <- html_session(url)
	pubwhippage <- read_html(pubwhip)	
	heading <- as.character(html_nodes(pubwhippage, "h1"))
	div.info$secondread[i] <- grepl('Second Reading',heading, ignore.case=T)
	div.info$thirdread[i] <- grepl('Third Reading',heading, ignore.case=T)
	div.info$oppday[i] <- grepl('Opposition Day',heading, ignore.case=T)
	div.info$clause[i] <- grepl('Clause',heading, ignore.case=T)
}

write.csv(div.info,file = paste(path,"CalcData/divisions_wFreeVote_reading_1015.csv" , sep=""))

#######################
rm(list=ls())

path <- "~/Dropbox (Personal)/Rebel Summaries/APSR_SKLLO_Repfiles/"

div.info <- read.csv(paste(path,"RawData/2005-2010/divisions.csv", sep=""))

div.info$secondread <- rep(NA, nrow(div.info))
div.info$thirdread <- rep(NA, nrow(div.info))
div.info$oppday <- rep(NA, nrow(div.info))
div.info$clause <- rep(NA, nrow(div.info))

for (i in 1:nrow(div.info)){
	url <- as.character(div.info$PubWhipUrl[i])
	pubwhip <- html_session(url)
	pubwhippage <- read_html(pubwhip)	
	heading <- as.character(html_nodes(pubwhippage, "h1"))
	div.info$secondread[i] <- grepl('Second Reading',heading, ignore.case=T)
	div.info$thirdread[i] <- grepl('Third Reading',heading, ignore.case=T)
	div.info$oppday[i] <- grepl('Opposition Day',heading, ignore.case=T)
	div.info$clause[i] <- grepl('Clause',heading, ignore.case=T)
}

write.csv(div.info,file = paste(path,"CalcData/divisions_wFreeVote_reading_0510.csv" , sep=""))


####################################
####################################
rm(list=ls())

library(readstata13)

path <- "~/Dropbox (Personal)/Rebel Summaries/APSR_SKLLO_Repfiles/"

setwd(paste(path, "CalcData", sep = "/"))

div.info0510 <- read.csv("divisions_wFreeVote_reading_0510.csv")

div.info1015 <- read.csv("divisions_wFreeVote_reading_1015.csv")


div.info1015$other <- 1-rowSums(cbind(div.info1015$secondread, div.info1015$thirdread, div.info1015$oppday, div.info1015$clause))

div.info0510$other <- 1-rowSums(cbind(div.info0510$secondread, div.info0510$thirdread, div.info0510$oppday, div.info0510$clause))


by(div.info0510$Rebels, div.info0510$oppday,sum)[2]/sum(div.info0510$oppday)
sum(div.info0510$oppday)

by(div.info0510$Rebels, div.info0510$secondread,sum)[2]/sum(div.info0510$secondread)
sum(div.info0510$secondread)

by(div.info0510$Rebels, div.info0510$thirdread,sum)[2]/sum(div.info0510$thirdread)
sum(div.info0510$thirdread)

by(div.info0510$Rebels, div.info0510$clause,sum)[2]/sum(div.info0510$clause)
sum(div.info0510$clause)

by(div.info0510$Rebels, div.info0510$other,sum)[2]/sum(div.info0510$other)
sum(div.info0510$other)



by(div.info1015$Rebels, div.info1015$oppday,sum)[2]/sum(div.info1015$oppday)
sum(div.info1015$oppday)

by(div.info1015$Rebels, div.info1015$secondread,sum)[2]/sum(div.info1015$secondread)
sum(div.info1015$secondread)

by(div.info1015$Rebels, div.info1015$thirdread,sum)[2]/sum(div.info1015$thirdread)
sum(div.info1015$thirdread)

by(div.info1015$Rebels, div.info1015$clause,sum)[2]/sum(div.info1015$clause)
sum(div.info1015$clause)

by(div.info1015$Rebels, div.info1015$other,sum)[2]/sum(div.info1015$other)
sum(div.info1015$other)

